************************************************
* Title: To prepare State Population Data 	   *
* Authors: Peter Kuhn, Trevor Osaki, Lei Yue   *
* Date: September 2024                         *
************************************************
clear

global homedir ""/Users/leiyue/Desktop/Projects/WhenIsDicrimUnfair/Replication_Final""  

cd $homedir
cap cd "1_Dataset Construction"

* Import the data but include state observations 
import delimited "kk_census_state_pop2019.csv", rowrange(13:63) stringcols(2 3 4) 

* We don't need these
drop v2 v4

* Designate a variable as a state ID
rename v1 state

* Convert variable for "population" into a numeric and set as a new variable
destring v3, gen(population) ignore(",")

drop v3

* Adjust population in terms of 1000s and roun
replace population = round(population/1000,1)

* Keep if population exceeds 5 million
keep if population >= 5000

* rename state ID
replace state = "az" if state == ".Arizona"
replace state = "ca" if state == ".California"
replace state = "fl" if state == ".Florida"
replace state = "ga" if state == ".Georgia"
replace state = "il" if state == ".Illinois"
replace state = "ind" if state == ".Indiana"
replace state = "ma" if state == ".Massachusetts" 
replace state = "mi" if state == ".Michigan"
replace state = "nj" if state == ".New Jersey"
replace state = "ny" if state == ".New York"
replace state = "nc" if state == ".North Carolina"
replace state = "oh" if state == ".Ohio"
replace state = "pa" if state == ".Pennsylvania"
replace state = "tn" if state == ".Tennessee"
replace state = "tx" if state == ".Texas"
replace state = "va" if state == ".Virginia"
replace state = "wa" if state == ".Washington" 

* Create dummies for each state 
local states az ca fl ga il ind ma mi nj ny nc oh pa tn tx va wa
foreach i of local states{
	gen `i' = (state == "`i'")
}

***************** SAVE DATA
cd $homedir

save "2_Main Analyses/d7_state_pop.dta", replace
